clear

*do ${fmartorell_home}/top_program.do

*log using ${fmartorell_home}/remediation/programs/readin_nes2.log, replace
log using ${d1}log/readin_nes2.log, replace

* Program reads in NES data, eliminates duplicate records, checks NES attemptnum and retest 
* variables against constructed measures, reshapes the data to have one record per ssn, 
* and examines # months between 1st 3 test attempts

/* *******************************************************
Read in the NES data
********************************************** */
*infile using ${fmartorell_home}/remediation/programs/NES_filelayout_test.dct
infile using ${d1}NES_filelayout_test.dct

rename region sex
compress

tab dob_m
tab dob_d
tab dob_y

tab admindate_y
tab admindate_m
tab admindate_d

gen str8 dob=string(dob_m)+string(dob_d)+string(dob_y)
gen bthday=mdy(dob_m,dob_d,dob_y)
format bthday %dD/N/Y


*** Check for duplicate records ***
bysort altpid admindate_y admindate_m admindate_d scalescore_sec1 scalescore_sec2 scalescore_sec3: gen numrec_admdatescr=_N
tab numrec
by altpid admindate_y admindate_m admindate_d: gen numrec_admdate=_N
tab numrec_admdate
by altpid admindate_y admindate_m admindate_d: keep if _n==1
drop numrec_*
*save ${fmartorell_home}/remediation/data/NESSCORE.dta, replace
save ${d1}data/NESSCORE.dta, replace

cap log close
*log using ${fmartorell_home}/remediation/programs/readin_nes2.log, append
log using ${d1}log/readin_nes2.log, append

**** See if records with same altpid match on dob, sex, and ethnicity ***
egen flag=tag(altpid)
by altpid: gen numrec=_N
foreach var of varlist sex ethnic bthday {
 sort altpid `var'
 by altpid: gen byte diff`var'=`var'[1]!=`var'[_N]
 tab diff`var' if flag==1 & numrec>1
}

 *When sex, ethnicity or dob do not agree, just assign that of 1st record (very rare, so this should be ok)
sort altpid admindate_y admindate_m admindate_d
foreach var of varlist sex ethnic dob bthday {
 by altpid: replace `var'=`var'[1]
}

*Check that attempt number from NES matches constructed variable
by altpid: gen attemptnum_fromdata=_n
tab attemptnum_fromdata
tab attemptnum_fromdata attemptnum if attemptnum<=5
tab attemptnum_fromdata attemptnum if attemptnum<=5 & diffbthday==0

*Check retest flag matches constructed variable
tab attemptnum retest if numrec>1, col nofreq 
tab attemptnum_fromdata retest if numrec>1, col nofreq 
tab attemptnum_fromdata retest, col nofreq 
tab retest if attemptnum_fromdata==1



/* *******************************************************************
Reshape data so it has one record per altpid
************************************************************************ */

*Check distribution of attempt numbers
egen maxatt=max(attemptnum_fromdata), by(altpid)
tab maxatt if flag==1
keep if attemptnum_fromdata<=8

keep altpid dob bthday ethnic sex admindate* attemptnum* retest pass* scalescore* wholistic maxatt


reshape wide admindate_y admindate_m admindate_d attemptnum retest pass_sec1 pass_sec2 pass_sec3 scalescore_sec1 scalescore_sec2 scalescore_sec3 wholistic, i(altpid) j(attemptnum_fromdata)

sort altpid
compress


/* **********************************************************************
Check the distribution of gaps in between test attempts, distribution of test scores,
smoothness of ethnicity variables
******************************************************************** */

gen int gap=.
** Note: this does not distinguish between retest attempts on a any particular section
forvalues i=1/3 { 
 qui replace gap =.
 local ip1=`i'+1
 disp ""
 disp "Gap between attempt `i' and `ip1'"
 qui replace gap=(12*admindate_y`ip1'+admindate_m`ip1'-12*admindate_y`i'-admindate_m`i')
 tab gap
}
drop gap
*save ${fmartorell_home}/remediation/data/tasp_uniquessn.dta, replace
save ${d1}data/tasp_uniquessn.dta, replace


/*
*** Identify which scale scores appear to be populated with reasonable N ***
gen one=1
forvalues i=1/3 {
 egen numperscore`i'=count(one), by(admindate_y1 admindate_m1 admindate_d1 scalescore_sec1`i')
}

 keep altpid ethnic dob sex *1 *2
gen byte one=1


forvalues i=1/5 {
 gen dethnic`i'=ethnic==`i'
}
gen byte male=sex=="M"
collapse dethnic* male (sum) one, by(scalescore_sec11 admindate_y1 admindate_m1 admindate_d1)
egen sum=sum(one) if scalescore_sec1>0 & scalescore_sec1<., by(admindate_y1 admindate_m1 admindate_d1)
gen pdf_admin=one/sum


gen byte one=1
keep dethnic* scalescore_sec*1  male one pre95




gen pre95=admindate_y1<1995
keep if admindate_y1!=1995
forvalues i=1/3 {
 preserve
 collapse dethnic* male (sum) one, by(scalescore_sec`i'1 pre95)
 save ${fmartorell_home}/tmp/sec`i'clps, replace
 restore
}
*/

log close

